In [1]:
import pandas as pd
import numpy as np

Importing Breast Cancer dataset¶

In [2]:
data = pd.read_csv("data.csv")

Basic understanding about Breast Cancer Wisconsin (Diagnostic) DataSet¶

In [43]:
print("Top 10 rows :\n")
data.head(10)
Top 10 rows :

Out[43]:
id diagnosis radius_mean texture_mean perimeter_mean area_mean smoothness_mean compactness_mean concavity_mean concave points_mean ... radius_worst texture_worst perimeter_worst area_worst smoothness_worst compactness_worst concavity_worst concave points_worst symmetry_worst fractal_dimension_worst
19 8510426 0 13.540 14.36 87.46 566.3 0.09779 0.08129 0.06664 0.04781 ... 15.11 19.26 99.70 711.2 0.14400 0.17730 0.23900 0.12880 0.2977 0.07259
20 8510653 0 13.080 15.71 85.63 520.0 0.10750 0.12700 0.04568 0.03110 ... 14.50 20.49 96.09 630.5 0.13120 0.27760 0.18900 0.07283 0.3184 0.08183
21 8510824 0 9.504 12.44 60.34 273.9 0.10240 0.06492 0.02956 0.02076 ... 10.23 15.66 65.13 314.9 0.13240 0.11480 0.08867 0.06227 0.2450 0.07773
37 854941 0 13.030 18.42 82.61 523.8 0.08983 0.03766 0.02562 0.02923 ... 13.30 22.81 84.46 545.9 0.09701 0.04619 0.04833 0.05013 0.1987 0.06169
40 855167 1 13.440 21.58 86.18 563.0 0.08162 0.06031 0.03110 0.02031 ... 15.93 30.25 102.50 787.9 0.10940 0.20430 0.20850 0.11120 0.2994 0.07146
43 856106 1 13.280 20.28 87.32 545.2 0.10410 0.14360 0.09847 0.06158 ... 17.38 28.00 113.10 907.2 0.15300 0.37240 0.36640 0.14920 0.3739 0.10270
48 857155 0 12.050 14.63 78.04 449.3 0.10310 0.09092 0.06592 0.02749 ... 13.76 20.70 89.88 582.6 0.14940 0.21560 0.30500 0.06548 0.2747 0.08301
49 857156 0 13.490 22.30 86.91 561.0 0.08752 0.07698 0.04751 0.03384 ... 15.15 31.82 99.00 698.8 0.11620 0.17110 0.22820 0.12820 0.2871 0.06917
50 857343 0 11.760 21.60 74.72 427.9 0.08637 0.04966 0.01657 0.01115 ... 12.98 25.72 82.98 516.5 0.10850 0.08615 0.05523 0.03715 0.2433 0.06563
51 857373 0 13.640 16.34 87.21 571.8 0.07685 0.06059 0.01857 0.01723 ... 14.67 23.19 96.08 656.7 0.10890 0.15820 0.10500 0.08586 0.2346 0.08025

10 rows × 32 columns

In [4]:
Total_Rows = data.shape[0]
Total_Cols = data.shape[1]
print("Total Rows is :",Total_Rows)
print("Total columns is :", Total_Cols)
Total Rows is : 569
Total columns is : 33
In [5]:
print( "\n Information about the Titanic dataset : \n " )
data.info()
 Information about the Titanic dataset : 
 
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 33 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id                       569 non-null    int64  
 1   diagnosis                569 non-null    object 
 2   radius_mean              569 non-null    float64
 3   texture_mean             569 non-null    float64
 4   perimeter_mean           569 non-null    float64
 5   area_mean                569 non-null    float64
 6   smoothness_mean          569 non-null    float64
 7   compactness_mean         569 non-null    float64
 8   concavity_mean           569 non-null    float64
 9   concave points_mean      569 non-null    float64
 10  symmetry_mean            569 non-null    float64
 11  fractal_dimension_mean   569 non-null    float64
 12  radius_se                569 non-null    float64
 13  texture_se               569 non-null    float64
 14  perimeter_se             569 non-null    float64
 15  area_se                  569 non-null    float64
 16  smoothness_se            569 non-null    float64
 17  compactness_se           569 non-null    float64
 18  concavity_se             569 non-null    float64
 19  concave points_se        569 non-null    float64
 20  symmetry_se              569 non-null    float64
 21  fractal_dimension_se     569 non-null    float64
 22  radius_worst             569 non-null    float64
 23  texture_worst            569 non-null    float64
 24  perimeter_worst          569 non-null    float64
 25  area_worst               569 non-null    float64
 26  smoothness_worst         569 non-null    float64
 27  compactness_worst        569 non-null    float64
 28  concavity_worst          569 non-null    float64
 29  concave points_worst     569 non-null    float64
 30  symmetry_worst           569 non-null    float64
 31  fractal_dimension_worst  569 non-null    float64
 32  Unnamed: 32              0 non-null      float64
dtypes: float64(31), int64(1), object(1)
memory usage: 146.8+ KB
In [6]:
print("duplicate values is : ",data.duplicated().sum())
duplicate values is :  0
In [ ]:
 

Columns distribution¶

In [7]:
print("Total columns is here :\n", data.columns.tolist())
Total columns is here :
 ['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean', 'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean', 'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se', 'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se', 'fractal_dimension_se', 'radius_worst', 'texture_worst', 'perimeter_worst', 'area_worst', 'smoothness_worst', 'compactness_worst', 'concavity_worst', 'concave points_worst', 'symmetry_worst', 'fractal_dimension_worst', 'Unnamed: 32']
In [8]:
numerical_col = data.select_dtypes(include=["int64","float64"]).columns
print("Total Numerical columns list is here :\n", numerical_col)
print("\nTotal Numerical columns is here :\n" ,numerical_col.value_counts().sum())
Total Numerical columns list is here :
 Index(['id', 'radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean',
       'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst', 'Unnamed: 32'],
      dtype='object')

Total Numerical columns is here :
 32
In [9]:
categorical_col = data.select_dtypes(include=["O"]).columns
print("Total categorical columns  list is here :\n", categorical_col)
print("\nTotal categorical columns is here :\n" ,categorical_col.value_counts().sum())
Total categorical columns  list is here :
 Index(['diagnosis'], dtype='object')

Total categorical columns is here :
 1
In [ ]:
 

basic statistics about the Breast Cancer data :¶

In [10]:
data.describe().T
Out[10]:
count mean std min 25% 50% 75% max
id 569.0 3.037183e+07 1.250206e+08 8670.000000 869218.000000 906024.000000 8.813129e+06 9.113205e+08
radius_mean 569.0 1.412729e+01 3.524049e+00 6.981000 11.700000 13.370000 1.578000e+01 2.811000e+01
texture_mean 569.0 1.928965e+01 4.301036e+00 9.710000 16.170000 18.840000 2.180000e+01 3.928000e+01
perimeter_mean 569.0 9.196903e+01 2.429898e+01 43.790000 75.170000 86.240000 1.041000e+02 1.885000e+02
area_mean 569.0 6.548891e+02 3.519141e+02 143.500000 420.300000 551.100000 7.827000e+02 2.501000e+03
smoothness_mean 569.0 9.636028e-02 1.406413e-02 0.052630 0.086370 0.095870 1.053000e-01 1.634000e-01
compactness_mean 569.0 1.043410e-01 5.281276e-02 0.019380 0.064920 0.092630 1.304000e-01 3.454000e-01
concavity_mean 569.0 8.879932e-02 7.971981e-02 0.000000 0.029560 0.061540 1.307000e-01 4.268000e-01
concave points_mean 569.0 4.891915e-02 3.880284e-02 0.000000 0.020310 0.033500 7.400000e-02 2.012000e-01
symmetry_mean 569.0 1.811619e-01 2.741428e-02 0.106000 0.161900 0.179200 1.957000e-01 3.040000e-01
fractal_dimension_mean 569.0 6.279761e-02 7.060363e-03 0.049960 0.057700 0.061540 6.612000e-02 9.744000e-02
radius_se 569.0 4.051721e-01 2.773127e-01 0.111500 0.232400 0.324200 4.789000e-01 2.873000e+00
texture_se 569.0 1.216853e+00 5.516484e-01 0.360200 0.833900 1.108000 1.474000e+00 4.885000e+00
perimeter_se 569.0 2.866059e+00 2.021855e+00 0.757000 1.606000 2.287000 3.357000e+00 2.198000e+01
area_se 569.0 4.033708e+01 4.549101e+01 6.802000 17.850000 24.530000 4.519000e+01 5.422000e+02
smoothness_se 569.0 7.040979e-03 3.002518e-03 0.001713 0.005169 0.006380 8.146000e-03 3.113000e-02
compactness_se 569.0 2.547814e-02 1.790818e-02 0.002252 0.013080 0.020450 3.245000e-02 1.354000e-01
concavity_se 569.0 3.189372e-02 3.018606e-02 0.000000 0.015090 0.025890 4.205000e-02 3.960000e-01
concave points_se 569.0 1.179614e-02 6.170285e-03 0.000000 0.007638 0.010930 1.471000e-02 5.279000e-02
symmetry_se 569.0 2.054230e-02 8.266372e-03 0.007882 0.015160 0.018730 2.348000e-02 7.895000e-02
fractal_dimension_se 569.0 3.794904e-03 2.646071e-03 0.000895 0.002248 0.003187 4.558000e-03 2.984000e-02
radius_worst 569.0 1.626919e+01 4.833242e+00 7.930000 13.010000 14.970000 1.879000e+01 3.604000e+01
texture_worst 569.0 2.567722e+01 6.146258e+00 12.020000 21.080000 25.410000 2.972000e+01 4.954000e+01
perimeter_worst 569.0 1.072612e+02 3.360254e+01 50.410000 84.110000 97.660000 1.254000e+02 2.512000e+02
area_worst 569.0 8.805831e+02 5.693570e+02 185.200000 515.300000 686.500000 1.084000e+03 4.254000e+03
smoothness_worst 569.0 1.323686e-01 2.283243e-02 0.071170 0.116600 0.131300 1.460000e-01 2.226000e-01
compactness_worst 569.0 2.542650e-01 1.573365e-01 0.027290 0.147200 0.211900 3.391000e-01 1.058000e+00
concavity_worst 569.0 2.721885e-01 2.086243e-01 0.000000 0.114500 0.226700 3.829000e-01 1.252000e+00
concave points_worst 569.0 1.146062e-01 6.573234e-02 0.000000 0.064930 0.099930 1.614000e-01 2.910000e-01
symmetry_worst 569.0 2.900756e-01 6.186747e-02 0.156500 0.250400 0.282200 3.179000e-01 6.638000e-01
fractal_dimension_worst 569.0 8.394582e-02 1.806127e-02 0.055040 0.071460 0.080040 9.208000e-02 2.075000e-01
Unnamed: 32 0.0 NaN NaN NaN NaN NaN NaN NaN
In [11]:
data.describe(include="all").T
Out[11]:
count unique top freq mean std min 25% 50% 75% max
id 569.0 NaN NaN NaN 30371831.432337 125020585.612224 8670.0 869218.0 906024.0 8813129.0 911320502.0
diagnosis 569 2 B 357 NaN NaN NaN NaN NaN NaN NaN
radius_mean 569.0 NaN NaN NaN 14.127292 3.524049 6.981 11.7 13.37 15.78 28.11
texture_mean 569.0 NaN NaN NaN 19.289649 4.301036 9.71 16.17 18.84 21.8 39.28
perimeter_mean 569.0 NaN NaN NaN 91.969033 24.298981 43.79 75.17 86.24 104.1 188.5
area_mean 569.0 NaN NaN NaN 654.889104 351.914129 143.5 420.3 551.1 782.7 2501.0
smoothness_mean 569.0 NaN NaN NaN 0.09636 0.014064 0.05263 0.08637 0.09587 0.1053 0.1634
compactness_mean 569.0 NaN NaN NaN 0.104341 0.052813 0.01938 0.06492 0.09263 0.1304 0.3454
concavity_mean 569.0 NaN NaN NaN 0.088799 0.07972 0.0 0.02956 0.06154 0.1307 0.4268
concave points_mean 569.0 NaN NaN NaN 0.048919 0.038803 0.0 0.02031 0.0335 0.074 0.2012
symmetry_mean 569.0 NaN NaN NaN 0.181162 0.027414 0.106 0.1619 0.1792 0.1957 0.304
fractal_dimension_mean 569.0 NaN NaN NaN 0.062798 0.00706 0.04996 0.0577 0.06154 0.06612 0.09744
radius_se 569.0 NaN NaN NaN 0.405172 0.277313 0.1115 0.2324 0.3242 0.4789 2.873
texture_se 569.0 NaN NaN NaN 1.216853 0.551648 0.3602 0.8339 1.108 1.474 4.885
perimeter_se 569.0 NaN NaN NaN 2.866059 2.021855 0.757 1.606 2.287 3.357 21.98
area_se 569.0 NaN NaN NaN 40.337079 45.491006 6.802 17.85 24.53 45.19 542.2
smoothness_se 569.0 NaN NaN NaN 0.007041 0.003003 0.001713 0.005169 0.00638 0.008146 0.03113
compactness_se 569.0 NaN NaN NaN 0.025478 0.017908 0.002252 0.01308 0.02045 0.03245 0.1354
concavity_se 569.0 NaN NaN NaN 0.031894 0.030186 0.0 0.01509 0.02589 0.04205 0.396
concave points_se 569.0 NaN NaN NaN 0.011796 0.00617 0.0 0.007638 0.01093 0.01471 0.05279
symmetry_se 569.0 NaN NaN NaN 0.020542 0.008266 0.007882 0.01516 0.01873 0.02348 0.07895
fractal_dimension_se 569.0 NaN NaN NaN 0.003795 0.002646 0.000895 0.002248 0.003187 0.004558 0.02984
radius_worst 569.0 NaN NaN NaN 16.26919 4.833242 7.93 13.01 14.97 18.79 36.04
texture_worst 569.0 NaN NaN NaN 25.677223 6.146258 12.02 21.08 25.41 29.72 49.54
perimeter_worst 569.0 NaN NaN NaN 107.261213 33.602542 50.41 84.11 97.66 125.4 251.2
area_worst 569.0 NaN NaN NaN 880.583128 569.356993 185.2 515.3 686.5 1084.0 4254.0
smoothness_worst 569.0 NaN NaN NaN 0.132369 0.022832 0.07117 0.1166 0.1313 0.146 0.2226
compactness_worst 569.0 NaN NaN NaN 0.254265 0.157336 0.02729 0.1472 0.2119 0.3391 1.058
concavity_worst 569.0 NaN NaN NaN 0.272188 0.208624 0.0 0.1145 0.2267 0.3829 1.252
concave points_worst 569.0 NaN NaN NaN 0.114606 0.065732 0.0 0.06493 0.09993 0.1614 0.291
symmetry_worst 569.0 NaN NaN NaN 0.290076 0.061867 0.1565 0.2504 0.2822 0.3179 0.6638
fractal_dimension_worst 569.0 NaN NaN NaN 0.083946 0.018061 0.05504 0.07146 0.08004 0.09208 0.2075
Unnamed: 32 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
In [12]:
data.describe(include="O").T
Out[12]:
count unique top freq
diagnosis 569 2 B 357
In [ ]:
 

Visualizations:¶

In [13]:
import matplotlib.pyplot as plt
import seaborn as sns

data.hist(numerical_col, figsize=(40,20))

plt.ylabel("Frequency")
plt.tight_layout()
plt.show()
No description has been provided for this image
In [38]:
sns.pairplot(data)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [15]:
correlation = data.corr(numeric_only=True)

plt.figure(figsize=(43,35))
sns.heatmap(correlation , annot=True  ,cmap="coolwarm", fmt=".2f")
plt.title("Correlation Metrix")
plt.tight_layout()
plt.show()
No description has been provided for this image
In [16]:
for col in data:
    plt.figure(figsize=(6,2))
    sns.countplot(x=data[col],color="gold")
    plt.title(f"Boxplot of {col}")
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
 

Preprocessing :¶

1. Handle null values:¶

In [17]:
data.isna().sum()
Out[17]:
id                           0
diagnosis                    0
radius_mean                  0
texture_mean                 0
perimeter_mean               0
area_mean                    0
smoothness_mean              0
compactness_mean             0
concavity_mean               0
concave points_mean          0
symmetry_mean                0
fractal_dimension_mean       0
radius_se                    0
texture_se                   0
perimeter_se                 0
area_se                      0
smoothness_se                0
compactness_se               0
concavity_se                 0
concave points_se            0
symmetry_se                  0
fractal_dimension_se         0
radius_worst                 0
texture_worst                0
perimeter_worst              0
area_worst                   0
smoothness_worst             0
compactness_worst            0
concavity_worst              0
concave points_worst         0
symmetry_worst               0
fractal_dimension_worst      0
Unnamed: 32                569
dtype: int64
In [18]:
#removing last column because it contains no meaningful Value
data = data.drop("Unnamed: 32",axis=1)

2. Handle categorical cloumns¶

In [19]:
categorical_col
Out[19]:
Index(['diagnosis'], dtype='object')
In [20]:
from sklearn.preprocessing import LabelEncoder

lb = LabelEncoder()
data.diagnosis = lb.fit_transform(data.diagnosis)
data
Out[20]:
id diagnosis radius_mean texture_mean perimeter_mean area_mean smoothness_mean compactness_mean concavity_mean concave points_mean ... radius_worst texture_worst perimeter_worst area_worst smoothness_worst compactness_worst concavity_worst concave points_worst symmetry_worst fractal_dimension_worst
0 842302 1 17.99 10.38 122.80 1001.0 0.11840 0.27760 0.30010 0.14710 ... 25.380 17.33 184.60 2019.0 0.16220 0.66560 0.7119 0.2654 0.4601 0.11890
1 842517 1 20.57 17.77 132.90 1326.0 0.08474 0.07864 0.08690 0.07017 ... 24.990 23.41 158.80 1956.0 0.12380 0.18660 0.2416 0.1860 0.2750 0.08902
2 84300903 1 19.69 21.25 130.00 1203.0 0.10960 0.15990 0.19740 0.12790 ... 23.570 25.53 152.50 1709.0 0.14440 0.42450 0.4504 0.2430 0.3613 0.08758
3 84348301 1 11.42 20.38 77.58 386.1 0.14250 0.28390 0.24140 0.10520 ... 14.910 26.50 98.87 567.7 0.20980 0.86630 0.6869 0.2575 0.6638 0.17300
4 84358402 1 20.29 14.34 135.10 1297.0 0.10030 0.13280 0.19800 0.10430 ... 22.540 16.67 152.20 1575.0 0.13740 0.20500 0.4000 0.1625 0.2364 0.07678
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
564 926424 1 21.56 22.39 142.00 1479.0 0.11100 0.11590 0.24390 0.13890 ... 25.450 26.40 166.10 2027.0 0.14100 0.21130 0.4107 0.2216 0.2060 0.07115
565 926682 1 20.13 28.25 131.20 1261.0 0.09780 0.10340 0.14400 0.09791 ... 23.690 38.25 155.00 1731.0 0.11660 0.19220 0.3215 0.1628 0.2572 0.06637
566 926954 1 16.60 28.08 108.30 858.1 0.08455 0.10230 0.09251 0.05302 ... 18.980 34.12 126.70 1124.0 0.11390 0.30940 0.3403 0.1418 0.2218 0.07820
567 927241 1 20.60 29.33 140.10 1265.0 0.11780 0.27700 0.35140 0.15200 ... 25.740 39.42 184.60 1821.0 0.16500 0.86810 0.9387 0.2650 0.4087 0.12400
568 92751 0 7.76 24.54 47.92 181.0 0.05263 0.04362 0.00000 0.00000 ... 9.456 30.37 59.16 268.6 0.08996 0.06444 0.0000 0.0000 0.2871 0.07039

569 rows × 32 columns

3. detect Outlines and Remove¶

In [21]:
for i in data:
    q1 = data[i].quantile(0.25)
    q3 = data[i].quantile(0.75)
    iqr = q3 - q1 
    lower = q1 - 1.5 * iqr
    upper = q3 + 1.5 * iqr
    data = data [(data[i] >= lower) & (data[i ] <= upper)]
In [22]:
for col in data:
    plt.figure(figsize=(6, 2))
    sns.boxplot(x=data[col], color="green")
    plt.title(f"Boxplot of {col}")
    plt.tight_layout()
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Feature Engineering:¶

In [23]:
from sklearn.model_selection import train_test_split

x = data.drop("diagnosis",axis=1)
y = data.diagnosis

x_train,x_test,y_train,y_test = train_test_split(x ,y, test_size=0.2 , random_state=10)
In [24]:
y.value_counts()
Out[24]:
diagnosis
0    219
1     14
Name: count, dtype: int64
In [25]:
target_var = y.map({0: "Benign", 1: "Malignant"}).value_counts()

plt.bar(target_var.index, target_var.values, color="red")
plt.xlabel("Diagnosis")
plt.ylabel("Count")
plt.title("Diagnosis Distribution")
plt.show()
No description has been provided for this image

Feature Scaling¶

In [26]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

Logistic Regression model import¶

In [27]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(x_train,y_train)
Out[27]:
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression()

prediction¶

In [28]:
y_pred = model.predict(x_test)
y_pred
Out[28]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 0])

model Evaluation¶

In [37]:
from sklearn.metrics import confusion_matrix , precision_score ,auc, roc_curve,roc_auc_score,recall_score,classification_report ,accuracy_score

# Accuracy
accuracy = accuracy_score(y_test,y_pred)
print("\nAcuuracy score is :\n",accuracy)

#confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix is:\n", cm)

#precision
precision = precision_score(y_test, y_pred)  # use 'M' if diagnosis is 'M' or 'B'
print("\nPrecision score is :", precision)

# Recall
recall = recall_score(y_test, y_pred)
print("\nRecall score is :", recall)


# ROC AUC Score
y_proba = model.predict_proba(x_test)[:, 1] 
roc_auc = roc_auc_score(y_test, y_proba)
print("\nROC AUC Score is:", roc_auc)

# clasification report
cr = classification_report(y_test, y_pred)
print("\n classification report is :\n", cr)
Acuuracy score is :
 0.9574468085106383

Confusion Matrix is:
 [[43  0]
 [ 2  2]]

Precision score is : 1.0

Recall score is : 0.5

ROC AUC Score is: 0.9941860465116279

 classification report is :
               precision    recall  f1-score   support

           0       0.96      1.00      0.98        43
           1       1.00      0.50      0.67         4

    accuracy                           0.96        47
   macro avg       0.98      0.75      0.82        47
weighted avg       0.96      0.96      0.95        47

In [30]:
cm = confusion_matrix(y_test, y_pred)

sns.heatmap(cm , annot=True  ,cmap="coolwarm", fmt=".2f")
plt.title("Confusion Metrix")
plt.tight_layout()
plt.show()
No description has been provided for this image
In [31]:
y_proba = model.predict_proba(x_test)[:, 1]

fpr, tpr, thresholds = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', label=f'ROC Curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', linestyle='--') 

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc="lower right")
plt.show()
No description has been provided for this image
In [ ]:
 
In [ ]: